home *** CD-ROM | disk | FTP | other *** search
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; ;;
- ;;; Centre for Speech Technology Research ;;
- ;;; University of Edinburgh, UK ;;
- ;;; Copyright (c) 1998 ;;
- ;;; All Rights Reserved. ;;
- ;;; ;;
- ;;; Permission is hereby granted, free of charge, to use and distribute ;;
- ;;; this software and its documentation without restriction, including ;;
- ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
- ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
- ;;; permit persons to whom this work is furnished to do so, subject to ;;
- ;;; the following conditions: ;;
- ;;; 1. The code must retain the above copyright notice, this list of ;;
- ;;; conditions and the following disclaimer. ;;
- ;;; 2. Any modifications must be clearly marked as such. ;;
- ;;; 3. Original authors' names are not deleted. ;;
- ;;; 4. The authors' names are not used to endorse or promote products ;;
- ;;; derived from this software without specific prior written ;;
- ;;; permission. ;;
- ;;; ;;
- ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
- ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
- ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
- ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
- ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
- ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
- ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
- ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
- ;;; THIS SOFTWARE. ;;
- ;;; ;;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;
- ;;; Functions for building LTS rules sets from lexicons
- ;;;
- ;;;
-
- (defvar pl-table nil)
-
- (define (allaligns phones letters)
- "(cummulate phones lets)
- Aligns all possible ways for these strings."
- (cond
- ((null letters)
- ;; (wrongly) assume there are never less letters than phones
- (if phones
- (format t "wrong end: %s\n" word))
- nil)
- ((null phones)
- nil)
- (t
- (if (< (length phones) (length letters))
- (begin
- (cummulate '_epsilon_ (car letters))
- (allaligns phones (cdr letters))))
- (cummulate (car phones) (car letters))
- (allaligns (cdr phones) (cdr letters)))))
-
- (define (valid-pair phone letter)
- "(valid-pair phone letter)
- If predefined to be valid."
- (let ((entry1 (assoc letter pl-table)))
- (if entry1
- (assoc phone (cdr entry1))
- nil)))
-
-
- (define (valid-pair-e phone nphone letter)
- "(valid-pair-e phone letter)
- Special cases for when epsilon may be inserted before letter."
- (let ((ll (assoc letter pl-table))
- (pp (intern (string-append phone "-" nphone))))
- (assoc pp (cdr ll))))
-
- (define (find-aligns phones letters)
- "(find-aligns phones letters)
- Find all feasible alignments."
- (let ((r nil))
- (cond
- ((and (null (cdr phones)) (null (cdr letters))
- (equal? (car phones) (car letters))
- (equal? '# (car phones)))
- (list (list (cons '# '#)))) ;; valid end match
- (t
- (if (valid-pair '_epsilon_ (car letters))
- (set! r (mapcar
- (lambda (p)
- (cons (cons '_epsilon_ (car letters)) p))
- (find-aligns phones (cdr letters)))))
- (if (valid-pair (car phones) (car letters))
- (set! r
- (append r
- (mapcar
- (lambda (p)
- (cons (cons (car phones) (car letters)) p))
- (find-aligns (cdr phones) (cdr letters))))))
- ;; Hmm, change this to always check doubles
- (if (valid-pair-e (car phones) (car (cdr phones)) (car letters))
- (set! r
- (append r
- (mapcar
- (lambda (p)
- (cons (cons (intern (format nil "%s-%s"
- (car phones)
- (car (cdr phones))))
- (car letters)) p))
- (find-aligns (cdr (cdr phones))
- (cdr letters))))))
- r))))
-
- (define (findallaligns phones letters)
- (let ((a (find-aligns phones letters)))
- (if (null a)
- (format t "failed: %l %l\n" letters phones))
- a))
-
- (define (cummulate phone letter)
- "(cummulate phone letter)
- record the alignment of this phone and letter."
- (if (or (equal? phone letter)
- (and (not (equal? phone '#))
- (not (equal? letter '#))))
- (let ((entry1 (assoc letter pl-table))
- score)
- (if (equal? phone '_epsilon_)
- (set! score 0.1)
- (set! score 1))
- (if entry1
- (let ((entry2 (assoc phone (cdr entry1))))
- (if entry2
- (set-cdr! entry2 (+ score (cdr entry2)))
- (set-cdr! entry1 (cons (cons phone 1) (cdr entry1)))))
- (set! pl-table
- (cons
- (cons letter
- (list (cons phone score)))
- pl-table)))
- t)))
-
- (define (score-pair phone letter)
- "(score-pair phone letter)
- Give score for this particular phone letter pair."
- (let ((entry1 (assoc letter pl-table)))
- (if entry1
- (let ((entry2 (assoc phone (cdr entry1))))
- (if entry2
- (cdr entry2)
- 0))
- 0)))
-
- (define (cummulate-aligns aligns)
- (mapcar
- (lambda (a)
- (mapcar
- (lambda (p)
- (cummulate (car p) (cdr p)))
- a))
- aligns)
- t)
-
- (define (cummulate-pairs trainfile)
- "(cummulate-pairs trainfile)
- Build cummulatation table from allowable alignments in trainfile."
- (if (not pl-table)
- (set! pl-table
- (mapcar
- (lambda (l)
- (cons (car l) (mapcar (lambda (c) (cons c 0)) (cdr l))))
- allowables)))
- (let ((fd (fopen trainfile "r"))
- (c 0) (d 0)
- (entry))
- (while (not (equal? (set! entry (readfp fd)) (eof-val)))
- (if (equal? c 1000)
- (begin
- (format t "ENTRY: %d %l\n" (set! d (+ 1000 d)) entry)
- (set! c 0)))
- (set! word (car entry))
- (cummulate-aligns
- (findallaligns
- (enworden (car (cdr (cdr entry))))
- (enworden (symbolexplode (car entry)))))
- (set! c (+ 1 c)))
- (fclose fd)))
-
- (define (find_best_alignment phones letters)
- "(find_best_alignment phones letters)
- Find the alignement containg the most frequent alignment pairs."
- ;; hackily do this as a global
- (set! fba_best_score 0)
- (set! fba_best nil)
- (find-best-align phones letters nil 0)
- fba_best
- )
-
- (define (find-best-align phones letters path score)
- "(find-best-align phones letters)
- Find all feasible alignments."
- (cond
- ((null letters)
- (if (> score fba_best_score)
- (begin
- (set! fba_best_score score)
- (set! fba_best (reverse path))))
- nil)
- (t
- (if (valid-pair '_epsilon_ (car letters))
- (find-best-align phones (cdr letters)
- (cons (cons '_epsilon_ (car letters)) path)
- (+ score (score-pair '_epsilon_ (car letters)))))
- (if (valid-pair (car phones) (car letters))
- (find-best-align (cdr phones) (cdr letters)
- (cons (cons (car phones) (car letters))path)
- (+ score (score-pair (car phones) (car letters)))))
- (if (valid-pair-e (car phones) (car (cdr phones)) (car letters))
- (find-best-align (cdr (cdr phones)) (cdr letters)
- (cons (cons (intern (format nil "%s-%s"
- (car phones)
- (car (cdr phones))))
- (car letters))
- path)
- (+ score (score-pair
- (intern (format nil "%s-%s"
- (car phones)
- (car (cdr phones))))
- (car letters))))))))
-
- (define (align_and_score phones letters path score)
- "(align_and_score phones lets)
- Aligns all possible ways for these strings."
- (cond
- ((null letters)
- (if (> score fba_best_score)
- (begin
- (set! fba_best_score score)
- (set! fba_best (reverse path))))
- nil)
- (t
- (if (< (length phones) (length letters))
- (align_and_score
- phones
- (cdr letters)
- (cons '_epsilon_ path)
- (+ score
- (score-pair '_epsilon_ (car letters)))))
- (align_and_score
- (cdr phones)
- (cdr letters)
- (cons (car phones) path)
- (+ score
- (score-pair (car phones) (car letters)))))))
-
- (define (aligndata file ofile)
- (let ((fd (fopen file "r"))
- (ofd (fopen ofile "w"))
- (c 1)
- (entry))
- (while (not (equal? (set! entry (readfp fd)) (eof-val)))
- (set! lets (enworden (symbolexplode (car entry))))
- (set! bp (find_best_alignment
- (enworden (car (cdr (cdr entry))))
- lets))
- (if (not bp)
- (format t "failed: %l\n" lets)
- (save_info (car (cdr entry)) bp ofd))
- (set! c (+ 1 c)))
- (fclose fd)
- (fclose ofd)))
-
- (define (enworden lets)
- (cons '# (reverse (cons '# (reverse lets)))))
-
- (define (save_info pos bp ofd)
- "(save_info pos bp ofd)
- Cut out one expensive step and 50M of diskspace and just save it
- in a simpler format."
- (format ofd "( ")
- (mapcar
- (lambda (l)
- (if (not (string-equal "#" (cdr l)))
- (format ofd "%s" (cdr l))))
- bp)
- (format ofd " %s" pos)
- (mapcar
- (lambda (l)
- (if (not (string-equal "#" (car l)))
- (format ofd " %s" (car l))))
- bp)
- (format ofd " )\n"))
-
- (define (normalise-table pl-table)
- "(normalise-table pl-table)
- Change scores into probabilities."
- (mapcar
- (lambda (s)
- (let ((sum (apply + (mapcar cdr (cdr s)))))
- (mapcar
- (lambda (p)
- (set-cdr! p (/ (cdr p) sum)))
- (cdr s))))
- pl-table)
- t)
-
- (define (save-table pre)
- (normalise-table pl-table)
- (set! fd (fopen (string-append pre "pl-tablesp.scm") "w"))
- (format fd "(set! pl-table '\n")
- (pprintf pl-table fd)
- (format fd ")\n")
- (fclose fd)
- t)
-
- (define (build-feat-file alignfile featfile)
- "(build-feat-file alignfile featfile)
- Build a feature file from the given align file. The feature
- file contain predicted phone, and letter with 3 preceding and
- 3 succeeding letters."
- (let ((fd (fopen alignfile "r"))
- (ofd (fopen featfile "w"))
- (entry)
- (pn)
- (sylpos 1))
- (while (not (equal? (set! entry (readfp fd)) (eof-val)))
- (set! lets (append '(0 0 0 0 #) (symbolexplode (car entry))
- '(# 0 0 0 0)))
- (set! phones (cdr (cdr entry)))
- (set! pn 5)
- (mapcar
- (lambda (p)
- (format ofd
- "%s %s %s %s %s %s %s %s %s %s %s\n"
- p
- (nth (- pn 4) lets)
- (nth (- pn 3) lets)
- (nth (- pn 2) lets)
- (nth (- pn 1) lets)
- (nth pn lets)
- (nth (+ pn 1) lets)
- (nth (+ pn 2) lets)
- (nth (+ pn 3) lets)
- (nth (+ pn 4) lets)
- (car (cdr entry)) ;; pos
- ;; sylpos
- ;; numsyls
- ;; num2end
- )
- (set! pn (+ 1 pn)))
- phones))
- (fclose fd)
- (fclose ofd))
- )
-
- (define (merge_models name filename)
- "(merge_models name filename)
- Merge the models into a single list of cart trees as a variable
- named by name, in filename."
- (require 'cart_aux)
- (let (trees fd)
- (set! trees
- (mapcar
- (lambda (l)
- (format t "%s\n" l)
- (set! tree (car (load (format nil "lts.%s.tree" l) t)))
- (set! tree (cart_simplify_tree tree nil))
- (list l tree))
- '(a b c d e f g h i j k l m n o p q r s t u v w x y z )
- ))
- (set! fd (fopen filename "w"))
- (format fd ";; LTS rules \n")
- (format fd "(set! %s '\n" name)
- (pprintf trees fd)
- (format fd ")\n")
- (fclose fd))
- )
-
- (define (lts_testset file cartmodels)
- "(lts_testset file cartmodels)
- Test an aligned lexicon file against a set of cart trees. Prints out
- The number of letters correct (for each letter), total number of
- letters correct and the total number of words correct. cartmodels is
- the structure as saved by merge_models."
- (let ((fd (fopen file "r"))
- (entry)
- (wordcount 0)
- (correctwords 0)
- (phonecount 0)
- (correctphones 0))
- (while (not (equal? (set! entry (readfp fd)) (eof-val)))
- (let ((letters (enworden (symbolexplode (car entry))))
- (phones (enworden (cdr (cdr entry))))
- (pphones))
- (set! wordcount (+ 1 wordcount))
- (set! pphones (gen_cartlts letters (car (cdr entry)) cartmodels))
- ; (set! pphones (gen_vilts letters (car (cdr entry))
- ; cartmodels wfstname))
- (if (equal? (ph-normalize pphones) (ph-normalize phones))
- (set! correctwords (+ 1 correctwords))
- (or nil
- (format t "failed %s %l %l\n" (car entry) phones pphones)))
- (count_correct_letters ;; exclude #, cause they're always right
- (cdr letters)
- (cdr phones)
- (cdr pphones))
- (set! phonecount (+ (length (cdr (cdr letters))) phonecount))
- ))
- (fclose fd)
- (mapcar
- (lambda (linfo)
- (format t "%s %d correct %d (%2.2f)\n"
- (car linfo) (car (cdr linfo))
- (car (cdr (cdr linfo)))
- (/ (* (car (cdr (cdr linfo))) 100) (car (cdr linfo))))
- (set! correctphones (+ correctphones (car (cdr (cdr linfo))))))
- correct_letter_table)
- (format t "phones %d correct %d (%2.2f)\n"
- phonecount correctphones (/ (* correctphones 100) phonecount))
- (format t "words %d correct %d (%2.2f)\n"
- wordcount correctwords (/ (* correctwords 100) wordcount))
- (format t "tree model has %d nodes\n"
- (apply + (mapcar (lambda (a) (cart_tree_node_count (car (cdr a))))
- cartmodels)))
- ))
-
- (define (cart_tree_node_count tree)
- "(tree_node_count tree)
- Count the number nodes (questions and leafs) in the given CART tree."
- (cond
- ((cdr tree)
- (+ 1
- (cart_tree_node_count (car (cdr tree)))
- (cart_tree_node_count (car (cdr (cdr tree))))))
- (t
- 1)))
-
- (defvar correct_letter_table
- (mapcar
- (lambda (l) (list l 0 0))
- '(a b c d e f g h i j k l m n o p q r s t u v w x y z))
- "correct_letter_table
- List used to cummulate the number of correct (and incorrect) letter to
- phone predictions. This list will be extended if there are more letters
- in your alphabet, though it doesn't take a fairly western european
- view of the alphabet, but you can change this yourself is necessary.")
-
- (define (count_correct_letters lets phs pphs)
- "(count_correct_letters lets phs pphs)
- Count which letters have the correct phone prediction. Cummulate this
- is a per letter table."
- (cond
- ((or (null phs) (null pphs) (null lets))
- (format t "misaligned entry\n")
- nil)
- ((and (null (cdr lets)) (null (cdr phs)) (null (cdr pphs)))
- nil) ;; omit final #
- (t
- (let ((letinfo (assoc (car lets) correct_letter_table)))
- (if (not letinfo)
- (set! correct_letter_table
- (append correct_letter_table
- (set! letinfo (list (list (car lets) 0 0))))))
- (set-car! (cdr letinfo) (+ 1 (car (cdr letinfo)))) ;; total
- (if (equal? (car phs) (car pphs)) ;; correct
- (set-car! (cdr (cdr letinfo)) (+ 1 (car (cdr (cdr letinfo))))))
- (count_correct_letters (cdr lets) (cdr phs) (cdr pphs))))))
-
- (define (ph-normalize ph)
- (cond
- ((null ph) nil)
- ((string-equal "_epsilon_" (car ph))
- (ph-normalize (cdr ph)))
- ((string-matches (car ph) ".*-.*")
- (cons
- (string-before (car ph) "-")
- (cons
- (string-after (car ph) "-")
- (ph-normalize (cdr ph)))))
- (t
- (cons (car ph) (ph-normalize (cdr ph))))))
-
- (define (make_let_utt_p letters pos)
- "(make_let_utt letters)
- Build an utterances from th4ese letters."
- (let ((utt (Utterance Text "")))
- (utt.relation.create utt 'LTS)
- (utt.relation.create utt 'LETTER)
- (utt.relation.create utt 'PHONE)
- ;; Create letter stream
- (mapcar
- (lambda (l)
- (let ((lsi (utt.relation.append utt 'LETTER)))
- (item.set_name lsi l)
- (item.set_feat lsi "pos" pos)))
- letters)
- utt))
-
- (define (gen_vilts letters pos cartmodels ngram)
- "(get_vilts letters pos cartmodels ngram)
- Use cart plus ngrams in viterbi search."
- (require 'lts)
- (let ((utt (make_let_utt_p letters pos)))
- (set! gen_vit_params
- (list
- (list 'Relation "LETTER")
- (list 'return_feat "phone")
- (list 'p_word "#")
- (list 'pp_word "0")
- (list 'wfstname ngram)
- (list 'cand_function 'lts_cand_function)))
- (Gen_Viterbi utt)
- (mapcar
- (lambda (lsi)
- (intern (item.feat lsi "phone")))
- (utt.relation.items utt 'LETTER))))
-
- (define (gen_cartlts letters pos cartmodels)
- "(get_cartlts letters cartmodels)
- Generate the full list of predicted phones, including
- epsilon and unexpanded multi-phones."
- (require 'lts)
- (let ((utt (make_let_utt_p letters pos)))
- (enworden
- (mapcar
- (lambda (lsi)
- (let ((tree (car (cdr (assoc_string (item.name lsi) cartmodels))))
- (p))
- (if (not tree)
- (begin
- (format t "failed to find tree for %s\n" (item.name lsi))
- nil)
- (begin
- (set! p (wagon_predict lsi tree))
- (item.set_feat lsi "val" p)
- p))))
- (reverse (cdr (reverse (cdr (utt.relation.items utt 'LETTER)))))))))
-
- (define (reduce_lexicon entryfile exceptionfile lts_function)
- "(reduce_lexicon entryfile exceptionfile lts_function)
- Look up each word in entryfile using the current lexicon, if the entry
- doesn't match save it in the exception file. This is a way of reducing
- the lexicon based on a letter to sound model (and lexical stress
- model, if appropriate)."
- (let ((fd (fopen entryfile "r"))
- (ofd (fopen exceptionfile "w"))
- (entry)
- (wordcount 0)
- (correctwords 0))
- (while (not (equal? (set! entry (readfp fd)) (eof-val)))
- (if (and (consp entry)
- (> (length entry) 1))
- (let ((lts (lts_function (car entry) (car (cdr entry))))
- (encount (lex.entrycount (car entry))))
- (set! wordcount (+ 1 wordcount))
- (if (and (equal? (nth 2 entry) (nth 2 lts))
- (< encount 2))
- (set! correctwords (+ 1 correctwords))
- (format ofd "%l\n" entry))
- )))
- (fclose fd)
- (fclose ofd)
- (format t "words %d correct %d (%2.2f)\n"
- wordcount correctwords (/ (* correctwords 100) wordcount))
- ))
-
- (provide 'lts_build)
-
-